Daten einlesen und Dataframe erstellen
# List files in folder "Data"
files <- list.files(path="./Data/", pattern=NULL, all.files=FALSE, full.names=TRUE)
# Create Dataframe with all csv from years 2015-2019
df <- ldply(.data = files, .fun = read.csv)
View(df)
# Count frequency of haltime & fulltime results
df_htr <- df %>% count(HTR)
df_ftr <- df %>% count(FTR)
# Create dataframe with halttime & fulltime result amounts
df_results <- data.frame(c("Away", "Draw", "Home"), c(df_htr$n), c(df_ftr$n))
# Rename column headers
col_headings <- c('Result','Halftime','Fulltime')
names(df_results) <- col_headings
# Plot grouped bar chart to visualize halftime & fulltime results
fig <- plot_ly(
df_results, x = ~Results, y = ~Halftime, type = 'bar', name = 'Half Time Score') %>%
add_trace(y = ~Fulltime, name = 'Full Time Score') %>%
layout(yaxis = list(title = 'Count'), barmode = 'group')
fig
# merge HTR & FTR to 1 column
df$result <- paste(df$HTR, df$FTR)
print("Example: H H = home team is winning at halftime and also wins the game at fulltime")
[1] "Example: H H = home team is winning at halftime and also wins the game at fulltime"
# Plot all different game progresses and their amount
df %>%
count(result) %>%
mutate(result = fct_reorder(result, n, .desc = TRUE)) %>%
plot_ly(x = ~result, y = ~n, text = ~n, textposition = 'auto') %>%
add_bars() %>%
layout(xaxis = list(title = "Game Progress"),
yaxis = list(title = "Amount"),
title = "How are the different game progresses distributed?")
# Group by game outcome & calculate probability of all outcomes
df_count_results <- df %>%
group_by(result) %>%
summarise(count_result = round(n() / nrow(df) * 100, digits = 2))
df_count_results %>%
plot_ly(labels = ~result, values = ~count_result) %>%
add_pie(hole = 0.4, color = I("white")) %>%
layout(xaxis = list(title = "Game Progress"),
yaxis = list(title = "Probability %"),
title = "What is the probability of each game progress?")
# Calculate probability
calc_prob <- function(df1, df2) {
prob <- round((100 / nrow(df1) * nrow(df2)), digits = 2)
return(prob)
}
# Filter home teams winning at halftime
df_ht_home <- df %>%
filter(HTR == "H")
# Filter home teams winning at halftime & fulltime
df_ft_home <- df_ht_home %>%
filter(FTR == "H")
home_win_prob <- calc_prob(df_ht_home, df_ft_home)
cat("Probability that the home team wins the game if they are leading at half time: ", home_win_prob, "%")
Probability that the home team wins the game if they are leading at half time: 82.55 %
# Filter away teams winning at halftime
df_ht_away <- df %>%
filter(HTR == "A")
# Filter away teams winning at halftime & fulltime
df_ft_away <- df_ht_away %>%
filter(FTR == "A")
away_win_prob <- calc_prob(df_ht_away, df_ft_away)
cat("Probability that the away team wins the game if they are leading at half time: ", away_win_prob, "%")
Probability that the away team wins the game if they are leading at half time: 72.03 %
# Filter draw at halftime
df_ht_draw <- df %>%
filter(HTR == "D")
# Filter draw at halftime & fulltime
df_ft_draw <- df_ht_draw %>%
filter(FTR == "D")
draw_prob <- calc_prob(df_ht_draw, df_ft_draw)
cat("Probability that the game ends in a draw if the halftime result is also a draw: ", draw_prob, "%")
Probability that the game ends in a draw if the halftime result is also a draw: 36.45 %
# Filter draw at halftime & the home team winning at fulltime
df_ht_draw_ft_home_win <- df_ht_draw %>%
filter(FTR == "H")
home_win_after_ht_draw_prob <- calc_prob(df_ht_draw, df_ht_draw_ft_home_win)
cat("Probability that the home team wins if the halftime result is a draw: ", home_win_after_ht_draw_prob, "%")
Probability that the home team wins if the halftime result is a draw: 38.03 %
# Probability that the team winning at half time wins the game
ht_ft_win_prob <- round(((home_win_prob * nrow(df_ft_home)) + (away_win_prob * nrow(df_ft_away))) / (nrow(df_ft_home) + nrow(df_ft_away)), digits = 2)
cat("Probability that the team leading at half time wins the entire game: ", ht_ft_win_prob, "%")
Probability that the team leading at half time wins the entire game: 78.41 %
LS0tCnRpdGxlOiAiRGF0YXZpeiBtaXQgUGxvdGx5IFBMIERhdGEiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMjIEZyYWdlc3RlbGx1bmc6ICJEaWUgTWFuc2NoYWZmdCwgZGllIHp1ciBIYWxiemVpdCB2b3JuZSBsaWVndCwgZ2V3aW5udCBtaXQgZWluZXIgQ2hhbmNlIHZvbiBtaW5kZXN0ZW5zIDc1JSBhdWNoIGRhcyBTcGllbC4gRmFsbHMgenVyIEhhbGJ6ZWl0IHVuZW50c2NoaWVkZW4gaXN0LCBnZXdpbm50IGVoZXIgZGFzIEhlaW10ZWFtIChDaGFuY2UgPiAzMy40JSkuIgoKCkRhZsO8ciBuZWhtZW4gd2lyIGRlbiBEYXRhY2FtcCBEYXRlbnNhdHogW1NvY2NlciBEYXRhXShodHRwczovL2FwcC5kYXRhY2FtcC5jb20vd29ya3NwYWNlL2RhdGFzZXRzL2RhdGFzZXQtcHl0aG9uLXNvY2NlcikKCkFscyBFaW5mw7xocnVuZyB3ZXJkZW4gd2lyIGF1ZiBEYXRhY2FtcCBmb2xnZW5kZSBLdXJzZSBkdXJjaGdlaGVuOgoKLSBbSW50ZXJhY3RpdmUgRGF0YSBWaXN1YWxpemF0aW9uIHdpdGggcGxvdGx5XShodHRwczovL2FwcC5kYXRhY2FtcC5jb20vbGVhcm4vY291cnNlcy9pbnRlcmFjdGl2ZS1kYXRhLXZpc3VhbGl6YXRpb24td2l0aC1wbG90bHktaW4tcikKCi0gW0ludGVybWVkaWF0ZSBJbnRlcmFjdGl2ZSBEYXRhIFZpc3VhbGl6YXRpb24gd2l0aCBwbG90bHldKGh0dHBzOi8vYXBwLmRhdGFjYW1wLmNvbS9sZWFybi9jb3Vyc2VzL2ludGVyYWN0aXZlLWRhdGEtdmlzdWFsaXphdGlvbi13aXRoLXBsb3RseS1pbi1yKQoKYGBge3J9CiMgQmlibGlvdGhla2VuIGltcG9ydGllcmVuCmxpYnJhcnkoInBsb3RseSIpCmxpYnJhcnkoInBseXIiKQpsaWJyYXJ5KCJkcGx5ciIpCmxpYnJhcnkoImZvcmNhdHMiKQpsaWJyYXJ5KCJSQ29sb3JCcmV3ZXIiKQpgYGAKIyMgRGF0ZW4gZWlubGVzZW4gdW5kIERhdGFmcmFtZSBlcnN0ZWxsZW4KYGBge3J9CiMgTGlzdCBmaWxlcyBpbiBmb2xkZXIgIkRhdGEiCmZpbGVzIDwtIGxpc3QuZmlsZXMocGF0aD0iLi9EYXRhLyIsIHBhdHRlcm49TlVMTCwgYWxsLmZpbGVzPUZBTFNFLCBmdWxsLm5hbWVzPVRSVUUpCgojIENyZWF0ZSBEYXRhZnJhbWUgd2l0aCBhbGwgY3N2IGZyb20geWVhcnMgMjAxNS0yMDE5CmRmIDwtIGxkcGx5KC5kYXRhID0gZmlsZXMsIC5mdW4gPSByZWFkLmNzdikKClZpZXcoZGYpCmBgYAoKYGBge3J9CiMgQ291bnQgZnJlcXVlbmN5IG9mIGhhbHRpbWUgJiBmdWxsdGltZSByZXN1bHRzCmRmX2h0ciA8LSBkZiAlPiUgY291bnQoSFRSKQpkZl9mdHIgPC0gZGYgJT4lIGNvdW50KEZUUikKYGBgCgpgYGB7cn0KIyBDcmVhdGUgZGF0YWZyYW1lIHdpdGggaGFsdHRpbWUgJiBmdWxsdGltZSByZXN1bHQgYW1vdW50cwpkZl9yZXN1bHRzIDwtIGRhdGEuZnJhbWUoYygiQXdheSIsICJEcmF3IiwgIkhvbWUiKSwgYyhkZl9odHIkbiksIGMoZGZfZnRyJG4pKQoKIyBSZW5hbWUgY29sdW1uIGhlYWRlcnMKY29sX2hlYWRpbmdzIDwtIGMoJ1Jlc3VsdCcsJ0hhbGZ0aW1lJywnRnVsbHRpbWUnKQpuYW1lcyhkZl9yZXN1bHRzKSA8LSBjb2xfaGVhZGluZ3MKCiMgUGxvdCBncm91cGVkIGJhciBjaGFydCB0byB2aXN1YWxpemUgaGFsZnRpbWUgJiBmdWxsdGltZSByZXN1bHRzCmZpZyA8LSBwbG90X2x5KAogIGRmX3Jlc3VsdHMsIHggPSB+UmVzdWx0cywgeSA9IH5IYWxmdGltZSwgdHlwZSA9ICdiYXInLCBuYW1lID0gJ0hhbGYgVGltZSBTY29yZScpICU+JSAKICBhZGRfdHJhY2UoeSA9IH5GdWxsdGltZSwgbmFtZSA9ICdGdWxsIFRpbWUgU2NvcmUnKSAlPiUKICBsYXlvdXQoeWF4aXMgPSBsaXN0KHRpdGxlID0gJ0NvdW50JyksIGJhcm1vZGUgPSAnZ3JvdXAnKQoKZmlnCmBgYApgYGB7cn0KIyBtZXJnZSBIVFIgJiBGVFIgdG8gMSBjb2x1bW4KZGYkcmVzdWx0IDwtIHBhc3RlKGRmJEhUUiwgZGYkRlRSKQoKcHJpbnQoIkV4YW1wbGU6IEggSCA9IGhvbWUgdGVhbSBpcyB3aW5uaW5nIGF0IGhhbGZ0aW1lIGFuZCBhbHNvIHdpbnMgdGhlIGdhbWUgYXQgZnVsbHRpbWUiKQpgYGAKCmBgYHtyfQojIFBsb3QgYWxsIGRpZmZlcmVudCBnYW1lIHByb2dyZXNzZXMgYW5kIHRoZWlyIGFtb3VudApkZiAlPiUKICBjb3VudChyZXN1bHQpICU+JQogIG11dGF0ZShyZXN1bHQgPSBmY3RfcmVvcmRlcihyZXN1bHQsIG4sIC5kZXNjID0gVFJVRSkpICU+JQogIHBsb3RfbHkoeCA9IH5yZXN1bHQsIHkgPSB+biwgdGV4dCA9IH5uLCB0ZXh0cG9zaXRpb24gPSAnYXV0bycpICU+JQogIGFkZF9iYXJzKCkgJT4lCiAgbGF5b3V0KHhheGlzID0gbGlzdCh0aXRsZSA9ICJHYW1lIFByb2dyZXNzIiksCiAgICAgICAgIHlheGlzID0gbGlzdCh0aXRsZSA9ICJBbW91bnQiKSwKICAgICAgICAgdGl0bGUgPSAiSG93IGFyZSB0aGUgZGlmZmVyZW50IGdhbWUgcHJvZ3Jlc3NlcyBkaXN0cmlidXRlZD8iKQpgYGAKYGBge3J9CiMgR3JvdXAgYnkgZ2FtZSBvdXRjb21lICYgY2FsY3VsYXRlIHByb2JhYmlsaXR5IG9mIGFsbCBvdXRjb21lcwpkZl9jb3VudF9yZXN1bHRzIDwtIGRmICU+JSAKICBncm91cF9ieShyZXN1bHQpICU+JSAKICBzdW1tYXJpc2UoY291bnRfcmVzdWx0ID0gcm91bmQobigpIC8gbnJvdyhkZikgKiAxMDAsIGRpZ2l0cyA9IDIpKQoKZGZfY291bnRfcmVzdWx0cyAlPiUKICBwbG90X2x5KGxhYmVscyA9IH5yZXN1bHQsIHZhbHVlcyA9IH5jb3VudF9yZXN1bHQpICU+JQogIGFkZF9waWUoaG9sZSA9IDAuNCwgY29sb3IgPSBJKCJ3aGl0ZSIpKSAlPiUKICBsYXlvdXQoeGF4aXMgPSBsaXN0KHRpdGxlID0gIkdhbWUgUHJvZ3Jlc3MiKSwKICAgICAgICAgeWF4aXMgPSBsaXN0KHRpdGxlID0gIlByb2JhYmlsaXR5ICUiKSwKICAgICAgICAgdGl0bGUgPSAiV2hhdCBpcyB0aGUgcHJvYmFiaWxpdHkgb2YgZWFjaCBnYW1lIHByb2dyZXNzPyIpCmBgYAoKYGBge3J9CiMgQ2FsY3VsYXRlIHByb2JhYmlsaXR5IApjYWxjX3Byb2IgPC0gZnVuY3Rpb24oZGYxLCBkZjIpIHsKICBwcm9iIDwtIHJvdW5kKCgxMDAgLyBucm93KGRmMSkgKiBucm93KGRmMikpLCBkaWdpdHMgPSAyKQogIHJldHVybihwcm9iKQp9CmBgYAoKYGBge3J9CiMgRmlsdGVyIGhvbWUgdGVhbXMgd2lubmluZyBhdCBoYWxmdGltZQpkZl9odF9ob21lIDwtIGRmICU+JSAKICBmaWx0ZXIoSFRSID09ICJIIikKCiMgRmlsdGVyIGhvbWUgdGVhbXMgd2lubmluZyBhdCBoYWxmdGltZSAmIGZ1bGx0aW1lCmRmX2Z0X2hvbWUgPC0gZGZfaHRfaG9tZSAlPiUgCiAgZmlsdGVyKEZUUiA9PSAiSCIpCgpob21lX3dpbl9wcm9iIDwtIGNhbGNfcHJvYihkZl9odF9ob21lLCBkZl9mdF9ob21lKQoKY2F0KCJQcm9iYWJpbGl0eSB0aGF0IHRoZSBob21lIHRlYW0gd2lucyB0aGUgZ2FtZSBpZiB0aGV5IGFyZSBsZWFkaW5nIGF0IGhhbGYgdGltZTogIiwgaG9tZV93aW5fcHJvYiwgIiUiKQpgYGAKCmBgYHtyfQojIEZpbHRlciBhd2F5IHRlYW1zIHdpbm5pbmcgYXQgaGFsZnRpbWUKZGZfaHRfYXdheSA8LSBkZiAlPiUgCiAgZmlsdGVyKEhUUiA9PSAiQSIpCgojIEZpbHRlciBhd2F5IHRlYW1zIHdpbm5pbmcgYXQgaGFsZnRpbWUgJiBmdWxsdGltZQpkZl9mdF9hd2F5IDwtIGRmX2h0X2F3YXkgJT4lIAogIGZpbHRlcihGVFIgPT0gIkEiKQoKYXdheV93aW5fcHJvYiA8LSBjYWxjX3Byb2IoZGZfaHRfYXdheSwgZGZfZnRfYXdheSkKCmNhdCgiUHJvYmFiaWxpdHkgdGhhdCB0aGUgYXdheSB0ZWFtIHdpbnMgdGhlIGdhbWUgaWYgdGhleSBhcmUgbGVhZGluZyBhdCBoYWxmIHRpbWU6ICIsIGF3YXlfd2luX3Byb2IsICIlIikKYGBgCgpgYGB7cn0KIyBGaWx0ZXIgZHJhdyBhdCBoYWxmdGltZQpkZl9odF9kcmF3IDwtIGRmICU+JSAKICBmaWx0ZXIoSFRSID09ICJEIikKCiMgRmlsdGVyIGRyYXcgYXQgaGFsZnRpbWUgJiBmdWxsdGltZQpkZl9mdF9kcmF3IDwtIGRmX2h0X2RyYXcgJT4lIAogIGZpbHRlcihGVFIgPT0gIkQiKQoKZHJhd19wcm9iIDwtIGNhbGNfcHJvYihkZl9odF9kcmF3LCBkZl9mdF9kcmF3KQoKY2F0KCJQcm9iYWJpbGl0eSB0aGF0IHRoZSBnYW1lIGVuZHMgaW4gYSBkcmF3IGlmIHRoZSBoYWxmdGltZSByZXN1bHQgaXMgYWxzbyBhIGRyYXc6ICIsIGRyYXdfcHJvYiwgIiUiKQpgYGAKCmBgYHtyfQojIEZpbHRlciBkcmF3IGF0IGhhbGZ0aW1lICYgdGhlIGhvbWUgdGVhbSB3aW5uaW5nIGF0IGZ1bGx0aW1lCmRmX2h0X2RyYXdfZnRfaG9tZV93aW4gPC0gZGZfaHRfZHJhdyAlPiUKICBmaWx0ZXIoRlRSID09ICJIIikKCmhvbWVfd2luX2FmdGVyX2h0X2RyYXdfcHJvYiA8LSBjYWxjX3Byb2IoZGZfaHRfZHJhdywgZGZfaHRfZHJhd19mdF9ob21lX3dpbikKCmNhdCgiUHJvYmFiaWxpdHkgdGhhdCB0aGUgaG9tZSB0ZWFtIHdpbnMgaWYgdGhlIGhhbGZ0aW1lIHJlc3VsdCBpcyBhIGRyYXc6ICIsIGhvbWVfd2luX2FmdGVyX2h0X2RyYXdfcHJvYiwgIiUiKQpgYGAKCmBgYHtyfQojIFByb2JhYmlsaXR5IHRoYXQgdGhlIHRlYW0gd2lubmluZyBhdCBoYWxmIHRpbWUgd2lucyB0aGUgZ2FtZQpodF9mdF93aW5fcHJvYiA8LSByb3VuZCgoKGhvbWVfd2luX3Byb2IgKiBucm93KGRmX2Z0X2hvbWUpKSArIChhd2F5X3dpbl9wcm9iICogbnJvdyhkZl9mdF9hd2F5KSkpIC8gKG5yb3coZGZfZnRfaG9tZSkgKyBucm93KGRmX2Z0X2F3YXkpKSwgZGlnaXRzID0gMikKCmNhdCgiUHJvYmFiaWxpdHkgdGhhdCB0aGUgdGVhbSBsZWFkaW5nIGF0IGhhbGYgdGltZSB3aW5zIHRoZSBlbnRpcmUgZ2FtZTogIiwgaHRfZnRfd2luX3Byb2IsICIlIikKYGBgCgpgYGB7cn0KCmBgYAoK